##############################
# Media Measurement Matters  #
# Replication Code           #
# Main Analyses              #
##############################

# The following file contains code for replicating all results presented in our
# main manuscript, including Figures 1-5 and all reported statistics. This file
# uses both the cleaned survey data constructed in the 2_survey_cleaning.R file,
# as well as the cleaned web-tracking data constructed using the 1_web_data_prep.R file.

# We have provided access to several, cleaned data files that can be used without
# having to run these scripts, in both CSV and RDS form. The survey data is
# called survey_data_cleaned, and the web-browsing data is titled web_use.

# Set-Up ----

# If desired, set up path into which to save plots and tables
plot_path <- NULL
table_path <- NULL

# Load packages
library(tidyverse)
library(ggridges)
library(ggalluvial)
library(ggpubr)
library(estimatr)
library(overlap)
library(descr)
library(gtools)

# Set up helper operations
`%notin%` <- Negate(`%in%`)

# Set up colors
red_mit = '#A31F34'
red_light = '#A9606C'
blue_mit = '#315485'
grey_light= '#C2C0BF'
grey_dark = '#8A8B8C'
black = '#353132'

# Source helper functions
source("helper_functions.R")

# > Read in data ----

# Read in web data 
web <- read_rds("data/web_use.rds")

# Calculate the number of overall news visits by respondent
n_visits <- web %>% group_by(resp_id) %>% summarise(count = n())
median(n_visits$count) # Median news visits
mean(n_visits$count >= 20) # % of respondents with >=20 visits

# Read in survey data
srvy <- read_rds("data/survey_data_cleaned.rds")

# Figure 1: Distribution of news visits by stated media preferences ----

# Filter out portal sites
web_noport <- web %>% 
  filter(domain_recode %notin% c("aol.com", "msn.com", "google.com"))

# Plot the distribution of site visits by stated media preferences, excluding
# portal sites

fig1 <- visit_plots(var = "med_pref", var_labels = c("Prefer\nMSNBC", 
                                                     "Prefer\nEntertainment", 
                                                     "Prefer\nFox"),
                    var_levels = c("MSNBC", "Entertainment", "Fox"),
                    x_label = "Relative Slant of News Visits",
                    y_label = "Stated Media Preference", 
                    weights = FALSE, df = web_noport, 
                    exemplars = c("cnn.com","news.yahoo.com","msnbc.com",
                                  "foxnews.com","nytimes.com"),
                    x_limits = c(-1.1,1.1))

ggsave(fig1, path = plot_path, filename = "fig_1.pdf", 
       height=3.5, width=8, dpi = 600)

# Calculate overlapping coefficients for Fox vs. MSNBC, at both site- and
# respondent-level

visit_overlap_medpref <- visit_overlap(df = web_noport, group_var = "med_pref", 
                                       values = c("MSNBC", "Entertainment", "Fox"),
                                       out_contrasts = c("MSNBC vs. Fox",
                                                         "MSNBC vs. Entertainment",
                                                         "Fox vs. Entertainment"))

overlap_medpref <- score_overlap(var = "score_noportals", group_var = "med_pref", 
                                 values = c("MSNBC", "Entertainment", "Fox"),
                                 out_contrasts = c("MSNBC vs. Fox",
                                                   "MSNBC vs. Entertainment",
                                                   "Fox vs. Entertainment"))

visit_overlap_medpref$`MSNBC vs. Fox` # Site-level overlap
overlap_medpref$`MSNBC vs. Fox` # Respondent-level overlap

# Figure 2: Mapping discrepancies between revealed and stated preferences ----

# Create an alluvial diagram plotting the relationship between revealed media 
# preferences, stated media preferences, and media choice within the free-choice 
# group

# Identify proportion of respondents with conservative media diets who state a
# preference for Fox (and vice versa for MSNBC)

crosstab(srvy$score_code, srvy$med_pref, prop.r = T, plot = F)

# Construct labels for the different groups
code_labels_free <- gen_ranges(bin_var = "score_code", score_version = "score_noportals",
                               parentheses = TRUE, group = 0)
code_labels_free <- paste(c("More Liberal\nThan CNN\n", "Between CNN\nand Yahoo!\n", 
                            "More Conserv.\nThan Yahoo!\n"), 
                          code_labels_free, sep = "")

# Summarize frequencies for each combination of revealed preferences, stated preferences,
# and revealed media choice within the experiment
srvy_alluvial_code <- srvy %>%
  filter(forcedchoice == 0) %>%
  group_by(score_code, med_pref, med_choice) %>%
  summarise(Freq = n()) %>%
  mutate(score_code = factor(score_code,levels = 1:3,
                             labels = code_labels_free,ordered = T),
         med_pref = factor(med_pref,levels=c("MSNBC","Entertainment","Fox"),
                           labels=c("MSNBC","Entertainment","Fox News"),
                           ordered=T),
         med_choice = factor(med_choice,levels=c("MSNBC","Entertainment","Fox"),
                             labels=c("MSNBC","Entertainment","Fox News"),
                             ordered=T)) %>% 
  as.data.frame()

# Set theme for plotting
theme_set(theme_bw() + 
            theme(legend.position = "bottom",
                  plot.title = element_text(hjust = 0.5, face = "bold",size = 16),
                  plot.subtitle = element_text(hjust = 0.5, face = "italic", size = 12),
                  axis.title.x = element_text(margin = unit(c(3, 0, 0, 0), "mm"),
                                              face = "bold", size = 12, angle = 0, hjust = 0.5),
                  axis.title.y = element_text(margin = unit(c(0, 3, 0, 0), "mm"), 
                                              face = "bold", size = 12),
                  legend.title = element_text(face = "bold", hjust = 0.5, size = 12),
                  legend.text = element_text(hjust = 0.5, size = 10),
                  axis.text.y = element_text(size = 10, color = "black"),
                  axis.text.x = element_text(size = 10, color = "black"),
                  legend.box = "vertical",
                  legend.background = element_blank(),
                  legend.box.background = element_rect(colour = "black"),
                  text=element_text(colour=black, 
                                    size=15)))

(fig2 <- ggplot(na.omit(srvy_alluvial_code), 
                aes(y=Freq, axis1=score_code,
                    axis2=med_pref,axis3=med_choice)) + 
    geom_alluvium(aes(fill=score_code),width=1/7) + 
    geom_stratum(width=1/7, fill="black", color=grey_dark) + 
    geom_text(stat="stratum",aes(label=after_stat(stratum)),angle=c(-90),color="white",
              size=3.25) + 
    scale_x_discrete("", 
                     limits=c("score_code","med_pref","med_choice"),
                     labels=c("Web-Tracking:\nRelative Slant of\nNews Consumption",
                              "Survey:\nStated Media\nPreference","Survey:\nRevealed\nMedia Choice"), 
                     expand=c(0.05,0.05)) + 
    scale_y_continuous(limits = c(0, 1685)) + 
    scale_fill_manual(values=c(blue_mit, grey_dark, red_mit)) +
    ylab("Frequency") +
    theme_minimal() +
    theme(legend.position = "none",
          panel.grid.minor = element_blank(),
          panel.grid.major.x = element_blank(),
          axis.title.y = element_text(size = 12, color = "black", 
                                      margin = unit(c(0, 3, 0, 0), "mm")),
          axis.text.x = element_text(size = 12, color = "black", face = "bold"))
)

ggsave(fig2, path = plot_path, filename = "fig_2.pdf", 
       width=11, height=7, dpi = 600)

# Figure 3: Persuasion by revealed volume preferences ----

# Generate labels for plotting
consump_labels3 <- gen_ranges(bin_var = "news_consump_bin3", score_version = "news_consump",
                              parentheses = TRUE)
consump_labels3 <- paste0(c("Low News\nConsumption\n", "Medium News\nConsumption\n", 
                            "High News\nConsumption\n"), 
                          consump_labels3)

# Generate persuasion estimates for each subgroup
consump_vsent_fx3 <- vsent_plot(var = "news_consump", nbins = 3, 
                                labels = consump_labels3, weights = FALSE)

# Plot resulting estimates
(fig3 <- ggplot(na.omit(consump_vsent_fx3 %>% filter(id != "Fox vs.\nMSNBC")), 
                                   aes(x=factor(val),
                                       col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                   "MSNBC vs.\nEntertainment")),
                                       shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                     "MSNBC vs.\nEntertainment")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", color = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(consump_vsent_fx3$bin)) + 
    xlab("Relative Volume of News vs. Non-News") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels(break_val = 0.2, by_val = 0.1)$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks = seq(-0.2,0.2,0.1),
                                           labels = plot_labels(break_val = 0.2, by_val = 0.1)$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) +
    theme(axis.text.x = element_text(size = 10, angle = 0, hjust = 0.5, color = "black")))

ggsave(fig3, path = plot_path, filename = "fig_3.pdf", 
       dpi = 600, width = 9, height = 5.25)

# Figure 4: Persuasion by revealed slant preferences ----

# > Panel (a): All news visits ----

# Generate labels for plotting
code_labels <- gen_ranges(bin_var = "score_code", score_version = "score_noportals",
                          parentheses = TRUE)
code_labels <- paste(c("More Liberal\nThan CNN\n", "Between CNN\nand Yahoo!\n", 
                       "More Conserv.\nThan Yahoo!\n"), 
                     code_labels, sep = "")

# Generate persuasion estimates for each subgroup
score_vsent_code <- group_vsent_plot(var = "score_code", nbins = 3, 
                                     labels = code_labels, 
                                     weights = FALSE)

# Plot results
(fig4_a <- ggplot(na.omit(score_vsent_code %>% filter(id != "Fox vs.\nMSNBC")), 
                                 aes(x=factor(val),
                                     col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                 "MSNBC vs.\nEntertainment")),
                                     shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                   "MSNBC vs.\nEntertainment")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", color = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(score_vsent_code$bin)) + 
    xlab("Relative Slant of News Consumption") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels(break_val = 0.2, by_val = 0.1)$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels(break_val = 0.2, by_val = 0.1)$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) +
    theme(axis.text.x = element_text(size = 10, hjust = 0.5, color = "black")) + 
    guides(color = "none", shape = "none")) 

ggsave(fig4_a, path = plot_path, filename = "fig_4_a.pdf", 
       width=9, height=4.75, dpi = 600)

# > Panel (b): Hard news visits only ----

# Construct labels for the different groups
code_labels_hard <- gen_ranges(bin_var = "score_code_hard", score_version = "score_hard",
                          parentheses = TRUE)
code_labels_hard <- paste(c("More Liberal\nThan CNN\n", "Between CNN\nand Yahoo!\n", 
                       "More Conserv.\nThan Yahoo!\n"), 
                       code_labels_hard, sep = "")

# Generate persuasion estimates for each subgroup
score_vsent_code_hard <- group_vsent_plot(var = "score_code_hard", nbins = 3, 
                                          labels = code_labels_hard, 
                                          weights = FALSE)

# Plot results
(fig4_b <- ggplot(na.omit(score_vsent_code_hard %>% filter(id != "Fox vs.\nMSNBC")), 
                           aes(x=factor(val),
                               col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                           "MSNBC vs.\nEntertainment")),
                               shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                             "MSNBC vs.\nEntertainment")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", color = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(score_vsent_code_hard$bin)) + 
    xlab("Relative Slant of News Consumption (Hard News Only)") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels(break_val = 0.2, by_val = 0.1)$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels(break_val = 0.2, by_val = 0.1)$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) +
    theme(axis.text.x = element_text(size = 10, hjust = 0.5, color = "black")))

ggsave(fig4_b, path = plot_path, filename = "fig_4_b.pdf", 
       width=9, height=5.25, dpi = 600)

# Figure 5: Persuasion by stated media preferences ----

# Create labels for each stated preference group
med_pref_labs <- paste0("Prefer\n", c("MSNBC", "Entertainment", "Fox"))

# Generate persuasion estimates for each stated preference group
pref_vsent_fx <- group_vsent_plot(var = "med_pref_num", nbins = 3, 
                                  labels = med_pref_labs, 
                                  weights = FALSE)

# Plot results
(fig5 <- ggplot(na.omit(pref_vsent_fx %>% filter(id != "Fox vs.\nMSNBC")), 
                                 aes(x=factor(val),
                                     col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                 "MSNBC vs.\nEntertainment")),
                                     shape = factor(id, levels = c("Fox vs.\nEntertainment", 
                                                                   "MSNBC vs.\nEntertainment")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", col = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(pref_vsent_fx$bin)) + 
    xlab("Stated Media Preference") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels(break_val = 0.2, by_val = 0.1)$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels(break_val = 0.2, by_val = 0.1)$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) +
    theme(axis.text.x = element_text(size = 10, hjust = 0.5, color = "black")))

ggsave(fig5, path = plot_path, filename = "fig_5.pdf", 
       width=9, height=5.25, dpi = 600)
